# This file contains the S-plus code that was used to implement the estimators and functionals in "Local polynomial fitting in failure rate estimation", IEEE Transactions on reliability, (57): 41-52, (2008) # As a fully working example the code herein can reproduce Figure 3 of the paper (the relevant data are also given below). # The same functions (with different input data) can be used in reproducing other figures of the same paper # More general use of the estimates is also feasible by supplying the corresponding data set. Use of the functions is self - explanatory and is further facilitated by the comments at he preamble of each function. #N.B. Compatibility with R is not guaranteed as the functions were not written for use with R - however, with minimal modifications, their use in the R environment should be feasible. suicdata<-c( 0.01,0.01, 0.01, 0.01, 0.01, 0.01, 0.02,0.02, 0.02,0.02, 0.03,0.04,0.06,0.08, 0.1,0.1, 0.12,0.12,0.12, 0.13,0.14, 0.15,0.15,0.15, 0.16, 0.16,0.17, 0.18,0.18, 0.19,0.20, 0.21, 0.22, 0.23,0.25, 0.26, 0.28, 0.28, 0.30, 0.32, 0.34, 0.36, 0.38, 0.39, 0.41, 0.41, 0.42, 0.43, 0.44, 0.44, 0.45, 0.45, 0.5, 0.53, 0.56, 0.58, 0.58, 0.61, 0.62, 0.62, 0.62, 0.64, 0.66, 0.70, 0.70, 0.70, 0.72, 0.77, 0.78, 0.78, 0.80, 0.82, 0.83, 0.85, 0.86, 0.96, 0.97, 0.98, 0.99, 1.05, 1.06, 1.07, 1.18, 1.35, 1.36, 1.42, 1.55, 1.59, 1.65, 1.73, 1.77, 1.79, 1.80, 1.91, 2.09, 2.14, 2.15, 2.15, 2.31, 2.33, 2.36, 2.36, 2.43, 2.45, 2.50, 2.51, 2.58, 2.64, 2.68, 3.08, 3.94, 4.12, 4.33, 4.42, 4.53, 4.88, 4.97, 5.11, 5.32, 5.55, 6.63, 6.89, 7.62, 11.41, 11.76, 11.85, 12.36, 13.22) cat(" ", "\n", mean(suicdata), " ", length(suicdata), "\n") "NmaxSelection"<-function(n, Nstar) #implementation of equation 10 in Ruppert, Sheather and Wand, JASA (1995) #n = sample size, Nstar = 5 or higher { max(min(trunc(n/20), Nstar),1) } "CreateSubsamples"<-function(xin, yin, N) #This divides the (ordered) sample to N equally sized subsamples #Also divides the ordered, according to the X_i's true values - Y_i - to subsamples, so that they correspond to the X_i subsamples #The result is returned in matrix form. { index<-order(xin) xin<- xin[index] yin<-yin[index] n<- length(xin) t<- n/N #cat("t = ", t) if(is.integer(t) != T){ t <- trunc(t)} #if N does not divide n then truncate t to the nearest integer to zero (some observations are lost) SubsamplesMatrixX<-matrix(xin, ncol=t, nrow=N) #put the subsamples (in blocks of N observations) in a matrix: column wise SubsamplesMatrixY<-matrix(yin, ncol=t, nrow=N) list(SubsamplesMatrixX, SubsamplesMatrixY) } "EstimateNhat"<-function(xin, yin) # This, by minimizing Mallow's Cp by quartic regression (see Ruppert, Sheather and Wand, JASA (1995), page 1262) # finds and returns N = number of blocks to be used in order to find an estimate of the regression function { Nstar<-5 n<- length(xin) N<-NmaxSelection(n, Nstar) index<-order(xin) xin<- xin[index] yin<-yin[index] Nseq<-seq(1, N, by =1) RSS<-vector("numeric") for(i in 1:N) { t<- n/Nseq[i] if(is.integer(t) != T){ t <- trunc(t)} #if Nseq does not divide n then truncate t to the nearest integer to zero (some observations are lost) SubsamplesMatrixX<-matrix(xin, ncol=t, nrow=Nseq[i]) #put the subsamples (in blocks of N observations) in a matrix: column wise SubsamplesMatrixY<-matrix(yin, ncol=t, nrow=Nseq[i]) #Coefficients.Matrix<-matrix(ncol=5, nrow= Residuals.Matrix<-matrix(0,nrow=Nseq[i], ncol=t) #NOMIZO - MAKE SURE Fit.Matrix<-matrix(0,nrow=Nseq[i], ncol=t) for(j in 1:Nseq[i]) { arg1<-lm(SubsamplesMatrixY[j,]~poly(SubsamplesMatrixX[j,], degree=4)) Residuals.Matrix[j,]<-resid(arg1) Fit.Matrix[j,]<-fitted.values(arg1) } RSS[i]<-sum(Residuals.Matrix^2) } Cp<-RSS/(RSS[N]/(n-5*N)) - (n-10*N) index<-order(Cp)[1] index } "QuarticRegressionEstimate"<-function(xin, yin) { N<-EstimateNhat(xin, yin) n <-length(xin) t<-n/N if(is.integer(t) != T){ t <- trunc(t)} SubsamplesMatrixX<-matrix(xin, ncol=t, nrow=N) #put the subsamples (in blocks of N observations) in a matrix: column wise SubsamplesMatrixY<-matrix(yin, ncol=t, nrow=N) Fit.Matrix<-matrix(0,nrow=N, ncol=t) Coefficients.Matrix<-matrix(0, ncol=5, nrow=N) for(j in 1:N) { polyarg<-poly(SubsamplesMatrixX[j,], degree=4) arg1<-lm(SubsamplesMatrixY[j,]~polyarg) Fit.Matrix[j,]<-fitted.values(arg1) Coefficients.Matrix[j,]<-poly.transform(polyarg, coef(arg1)) } out<-list(Coefficients.Matrix, SubsamplesMatrixX, SubsamplesMatrixY, Fit.Matrix) #return the coefficients and the fit - first argument: coefficients. second argument : fit. names(out)<-c("Coefficients.Matrix", "SubsamplesMatrixX", "SubsamplesMatrixY", "Fit.Matrix") out } "SecondDerivativeQuarticEstimate"<-function(xin, yin) { CoefAndFit<-QuarticRegressionEstimate(xin,yin) CoefficientsMatrix<-CoefAndFit$Coefficients.Matrix #cat(CoefficientsMatrix) SubsamplesMatrixX<-CoefAndFit$SubsamplesMatrixX SubsamplesMatrixY<-CoefAndFit$SubsamplesMatrixY SecondDerivIntercept<-2*CoefficientsMatrix[,3] SecondDerivBeta1<-6*CoefficientsMatrix[,4] SecondDerivBeta2<-12*CoefficientsMatrix[,5] SubsamplesMatrixXsquared<-SubsamplesMatrixX^2 FittedOutMatrix<- SecondDerivIntercept+ SecondDerivBeta1*SubsamplesMatrixX+ SecondDerivBeta2*SubsamplesMatrixXsquared #plot(as.vector(SubsamplesMatrixX), as.vector(FittedOutMatrix), type="l") #lines(xin, .5*(-.5)*(-1.5)*.8^.5*(xin^(-2.5))) as.vector(FittedOutMatrix) } "LambdaHat"<- #Watson & Leadbetter estimator (Hazard Analysis I, Biometrika (1964)) # -order statistics version. function(xin, xout, h, kfun){ n<- length(xin) n1<-1:n nn<-length(xout) xin.use<-sort(xin) #cat(xin.use) hazard.rate<-vector("numeric",nn) for(i in 1:nn){ hazard.rate[i]<-(1/h)*sum((kfun((xout[i]-xin.use)/h))/(n-n1+1)) } hazard.rate } "Epanechnikov"<- function(x){ #Epanechikov kernel, square root of 5 is taken as 2.236068 (to save comp. time). ifelse(abs(x)< 2.236068, (3/4)*(1-((x^2)/5 ))/2.236068, ifelse(abs(x)>2.236068, 0,0)) } "sn.0"<-function(xin, xout, h, kfun) { sapply(1:length(xout), function(i, xin, xout, h, kfun) sum(kfun((xin-xout[i])/h)), xin, xout, h, kfun) } "sn.1"<-function(xin, xout, h, kfun) { sapply(1:length(xout), function(i, xin, xout, h, kfun) sum(kfun((xin-xout[i])/h)*(xin-xout[i])), xin, xout, h, kfun) } "sn.2"<-function(xin, xout, h, kfun) { sapply(1:length(xout), function(i, xin, xout, h, kfun) sum(kfun((xin-xout[i])/h)*((xin-xout[i])^2)), xin, xout, h, kfun) } "sn.3"<-function(xin, xout, h, kfun){sapply(1:length(xout), function(i, xin, xout, h, kfun) sum(kfun((xin-xout[i])/h)*((xin-xout[i])^3)), xin, xout, h, kfun) } "sn.4"<-function(xin, xout, h, kfun) {sapply(1:length(xout), function(i, xin, xout, h, kfun) sum(kfun((xin-xout[i])/h)*((xin-xout[i])^4)), xin, xout, h, kfun) } "sn.5"<-function(xin, xout, h, kfun){sapply(1:length(xout), function(i, xin, xout, h, kfun) sum(kfun((xin-xout[i])/h)*((xin-xout[i])^5)), xin, xout, h, kfun) } "sn.6"<-function(xin, xout, h, kfun) {sapply(1:length(xout), function(i, xin, xout, h, kfun) sum(kfun((xin-xout[i])/h)*((xin-xout[i])^6)), xin, xout, h, kfun) } "tn.0"<-function(xin, xout, h, kfun, Y) { sapply(1:length(xout), function(i, xin, xout, h, kfun,Y) sum(kfun((xin-xout[i])/h)*Y), xin, xout, h, kfun, Y) } "tn.1"<-function(xin, xout, h, kfun, Y){sapply(1:length(xout), function(i, xin, xout, h, kfun,Y) sum(kfun((xin-xout[i])/h)*((xin-xout[i])*Y)),xin, xout, h, kfun, Y) } "tn.2"<-function(xin, xout, h, kfun, Y){sapply(1:length(xout), function(i, xin, xout, h, kfun,Y) sum(kfun((xin-xout[i])/h)*((xin - xout[i])^2 )*Y),xin, xout, h, kfun, Y) } "tn.3"<-function(xin, xout, h, kfun, Y){sapply(1:length(xout), function(i, xin, xout, h, kfun,Y) sum(kfun((xin-xout[i])/h)*((xin - xout[i])^3 )*Y),xin, xout, h, kfun, Y) } "IntEpanechnikov"<-function(x) { ifelse(x< -2.236068, 0, ifelse(x> 2.236068, 1, .5- (2.236068*x*(x^2-15))/100)) } "Ldoubledash"<-function(x) { dnorm(x,0,1)*(x^2-1) } "PsiEst"<-function(x,g) #estimate of the \psi_2 functional to be used for the altman method. { n<-length(x) Muse<-matrix(0, n,n) for(i in 1:n) { for(j in 1:n) { Muse[i,j]<-Ldoubledash((x[i]-x[j])/g) } } sum(Muse)/(n^2) } "AltmanBand"<-function(x,g) { (45/(-7*PsiEst(x,g)*length(x)))^(1/3) } "IntKde"<-function(xin, xout, h, kfun) { n<- length(xin) arg1<-(sapply(xout, "-", xin))/h arg2<-kfun(arg1) arg3<-colSums(arg2) arg3/n } "PilotWidth1"<- function(sample) { #returns Silverman's default width 1.06 A n^(-1/5) n<-length(sample) StD<-stdev(sample) IqR<-diff(quantile(sample, c(.25, .75))) tmp<-min(StD, IqR/1.34) DefWidth<- 1.06*tmp*n^(-1/5) DefWidth } "PlugInOptBand"<-function(xin, xout, kfun, IntKfun, ci) { N<-length(xin) a<-PilotWidth1(xin) b<-AltmanBand(xin, (length(xin))^(-.3)) c<-IntKde(xin, max(xout), b, IntKfun) d<-IntKde(xin, max(xout), a, IntKfun) Muse<-1/(1-IntKde(xin, max(xout), b, IntKfun)) -1 ThetaUse<-Theta22(xin, a, kfun, ci) top<-.6 #*Muse #R(K)*M, M=1/(1-F(T))-1) bottom<- N * (2/15) * ThetaUse #N*\mu_2^2(K)*\theta_2^2 cat("a=", a, " ", "b= ", b, " ","c= ", c, " ", "d= ", d, " ", "M = ", Muse, " ", "theta22 = ", ThetaUse) (top/bottom)^0.2 } "Theta22"<-function(xin, PilotBand, kfun, ci) { theta= (2*b2(xin, xin, PilotBand, Epanechnikov, ci))^2 sum(theta) } "b2"<-function(xin, xout, h, kfun, ci) { sn0<-sn.0(xin, xout, h, kfun) sn1<-sn.1(xin, xout, h, kfun) sn2<-sn.2(xin, xout, h, kfun) sn3<-sn.3(xin, xout, h, kfun) sn4<-sn.4(xin, xout, h, kfun) tn0<-tn.0(xin, xout, h, kfun, ci) tn1<-tn.1(xin, xout, h, kfun, ci) tn2<-tn.2(xin, xout, h, kfun, ci) top<- tn2*((sn1^2)-sn2*sn0)+tn1*(sn3*sn0-sn1*sn2)+tn0*((sn2^2)-sn1*sn3) bot<-(sn3^2) * sn0 -2*(sn1 *sn2 * sn3) - (sn0*sn2*sn4) + (sn1^2) *sn4 + sn2^2 top/bot } "Biweight"<-function(x) { #My Biweight Kernel :-) ifelse(abs(x) < 1, (15/16) * ( 1 - x^2)^2, ifelse(abs(x) > 1, 0, 0)) } ######################################W J ######################## "a0BiwWJ"<-function(a) { (15/16) * a -(5/8)*a^3+(3/16)*a^5+0.5 } "a1BiwWJ"<-function(a) { (15/32) *a^2- (15/32)* a^4+ (5/32) *a^6 - 5/32 } "a2BiwWJ"<-function(a) { (5/16)* a^3- (3/8)*a^5+(15/112)*a^7 +1/14 } ######################################W J ######################## "BoundaryBiweightWJ"<- function(x,a) { ifelse(abs(x)<1, ( a2BiwWJ(a) - a1BiwWJ(a)*x) * Biweight(x) / (a0BiwWJ(a)*a2BiwWJ(a)-(a1BiwWJ(a))^2) , ifelse(abs(x)>1, 0,0)) } "a0Epan"<-function(a) { -1/100*5^(1/2)*(a^3+5*5^(1/2))+3/20*5^(1/2)*(a+5^(1/2)) } "a1Epan"<-function(a) { -3/400*5^(1/2)*(a^4-25)+3/40*5^(1/2)*(a^2-5) } "a2Epan"<-function(a) { -3/500*5^(1/2)*(a^5+25*5^(1/2))+1/20*5^(1/2)*(a^3+5*5^(1/2)) } "BoundaryEpanechnikovWJ"<- function(x,a) { ifelse(abs(x)sqrt(5), 0,0)) } "LambdaHat"<- #Watson & Leadbetter estimator (Hazard Analysis I, Biometrika (1964)) -order statistics version. function(xin, xout, h, kfun) { n<- length(xin) n1<-1:n xin.use<-sort(xin) arg1<-(sapply(xout, "-", xin.use))/h arg2<-kfun(arg1)* 1/(n-n1+1) arg3<-colSums(arg2)* 1/h arg3 } "BoundaryLambdahat"<- function(xin, xout, h, kfun) { n<- length(xin) xin<-sort(xin) n1<-1:n xout1<-xout[which(abs(xout)h)] arg1bound<-(sapply(xout1, "-", xin))/h arg1notbound<-(sapply(xout2, "-", xin))/h arg2notbound<-kfun(arg1notbound) * 1/(n-n1+1) n2<-length(xout1) arg2bound<-vector("numeric", length=n2) # initialize the vector to hold the boundary points calculations for(i in 1:n2)#BoundaryBiweightWJ(arg1bound ,xout1/h) #this computes the estimate at the boundary points { arg2bound[i]<-colSums(BoundaryBiweightWJ((xout1[i]-xin)/h ,xout1[i]/h) / (n-n1+1))/h } arg2notbound<-colSums(arg2notbound)/h # reassign the result to the same variable in order to save memory. arg3<-c(arg2bound, arg2notbound) cbind(c(xout1, xout2), arg3) #return the result } "BoundaryLambdahatEpan"<- function(xin, xout, h, kfun) { n<- length(xin) xin<-sort(xin) n1<-1:n xout1<-xout[which(abs(xout)h)] arg1bound<-(sapply(xout1, "-", xin))/h arg1notbound<-(sapply(xout2, "-", xin))/h arg2notbound<-kfun(arg1notbound) * 1/(n-n1+1) n2<-length(xout1) arg2bound<-vector("numeric", length=n2) # initialize the vector to hold the boundary points calculations for(i in 1:n2) #this computes the estimate at the boundary points { arg2bound[i]<-colSums(BoundaryEpanechnikovWJ((xout1[i]-xin)/h ,xout1[i]/h) / (n-n1+1))/h } arg2notbound<-colSums(arg2notbound)/h # reassign the result to the same variable in order to save memory. arg3<-c(arg2bound, arg2notbound) cbind(c(xout1, xout2), arg3) #return the result } "LocLinEst"<-function(xin, xout, kfun, IntKfun) { MinX<-min(xin) MaxX<-max(xin) N<-length(xin) n<-length(xout) Delta<-(MaxX-MinX)/n PartInterval<-c(MinX, MinX+(1:n)*Delta) #Partion the interval (x[1], x[n]) into subintervals FirstBinCenter<-(PartInterval[1]+PartInterval[2])/2 BinCenters<- c(FirstBinCenter, FirstBinCenter+(1:(n-1))*Delta) fi<-sapply(1:n, function(i, xin, PartInterval) length(which(xin > PartInterval[i] & xin <= PartInterval[i+1])), xin, PartInterval) #crete the bin counts citmp<-fi/(N-cumsum(fi)+1) ci<-citmp/Delta #### CALCULATE THE BANDWIDTH FOR THE DERIVATIVE ESTIMATE theta22#### h<- PilotWidth1(xin) # +.5 usethis<-SecondDerivativeQuarticEstimate(xout, ci) sn0<-sn.0(BinCenters, BinCenters, h, kfun) sn1<-sn.1(BinCenters, BinCenters, h, kfun) sn2<-sn.2(BinCenters, BinCenters, h, kfun) sn3<-sn.3(BinCenters, BinCenters, h, kfun) sn4<-sn.4(BinCenters, BinCenters, h, kfun) sn5<-sn.5(BinCenters, BinCenters, h, kfun) sn6<-sn.6(BinCenters, BinCenters, h, kfun) tn0<-tn.0(BinCenters, BinCenters, h, kfun, ci) tn1<-tn.1(BinCenters, BinCenters, h, kfun, ci) tn2<-tn.2(BinCenters, BinCenters, h, kfun, ci) tn3<-tn.3(BinCenters, BinCenters, h, kfun, ci) den<- sn0*sn2*sn4*sn6 - sn0*sn2*(sn5^2) - sn0*(sn3^2) *sn6 + 2*sn0*sn3*sn4*sn5-sn0*(sn4^3)-sn1^2*sn4*sn6 +( sn1^2)*(sn5^2 )+ 2*sn1*sn2*sn3*sn6 - 2*sn1*(sn3^2) *sn5 - 2*sn1*sn4*sn2*sn5 + 2*sn1*sn3*(sn4^2) - (sn2^3) *sn6 + 2*(sn2^2)*sn3*sn5 + (sn2^2)*(sn4^2)-3*sn2*sn4*sn3^2 + sn3^4 A1<- sn1*sn3*sn6 -sn1*sn4*sn5 - (sn2^2) *sn6+sn2*sn3*sn5+sn2*sn4^2 -sn4*( sn3^2) A2<- sn0*sn3*sn6-sn0*sn4*sn5-sn2*sn1*sn6+sn3*sn2*sn4+sn1*sn3*sn5-(sn3^3) A3<- sn0*sn2*sn6-sn0*(sn4^2) -(sn1^2) *sn6+2*sn3*sn1*sn4-sn2*sn3^2 A4<- sn0*sn2*sn5 - sn0*sn3*sn4 - (sn1^2) *sn5+sn1*sn2*sn4+sn1*(sn3^2)-sn3*(sn2^2) est2<- 2* (A1*tn0 - A2*tn1 + A3*tn2 - A4*tn3)/den # cat("est2 = ", Delta * sum(est2^2)) # Compute the bandwidth estimate to use: theta22<- Delta * sum(est2^2) RofK <- 3* sqrt(5)/25 MuofK <- 3* sqrt(5) /35 Muse<-1/(1-IntKde(xin, max(xout), PilotWidth1(xin), IntKfun)) -1 #huse1<-( RofK*Muse /(N * MuofK^2 * theta22))^0.2 huse<- ( RofK*Muse /(N * MuofK^2 * Delta*sum(usethis^2)))^0.2 sn0<-sn.0(BinCenters, xout, huse, kfun) sn1<-sn.1(BinCenters, xout, huse, kfun) sn2<-sn.2(BinCenters, xout, huse, kfun) tn0<-tn.0(BinCenters, xout, huse, kfun, ci) tn1<-tn.1(BinCenters, xout, huse, kfun, ci) estuse<-round((tn1*sn1-tn0*sn2)/(sn1^2-sn0*sn2), digits =4) exportData(estuse,"ttt.xls") #wluse<-round(BoundaryLambdahat(xin, xout, PilotWidth1(xin), Biweight), digits =4) plot(xout, estuse, type="l", xlab="time (in 1000's of hours)", ylab="failure rate", ylim=c(0,1)) #lines( wluse, lty=4, col=5) } xout<-seq(0, max(suicdata), length=150) #cat(length(suicdata), "\n", 1+2*2*log10(length(suicdata)), "\n") LocLinEst(suicdata,xout,Epanechnikov, IntEpanechnikov)